import jieba
import json
import re

# 确定检索词（search）和屏蔽词（ban）
def banWord(searchContext,indexfile):
    # print(searchContext)
    # searchContext = searchContext.replace(" ","")
    right = ""
    if "-" in searchContext:
        pos = searchContext.index("-")
        # print(pos)
        left = searchContext[0:pos]
        right = searchContext[pos+1:]
        # print(left,right)
    else:
        left = searchContext
    result = dict()
    result["search"] = left
    if right != "":
        result["ban"] = list(jieba.cut_for_search(right))
        result["banfile"] = banfile(list(result["ban"]),indexfile)
    else:
        result["ban"] = -1
        result["banfile"] = -1
    # print(result)
    return result

# 获取屏蔽词所在文档ID
def banfile(banword,indexfile):
    with open(indexfile, "rb") as file:
        indexfile = json.load(file)
    banfilelist = []
    for word in banword:
        for key in indexfile.keys():
            m = re.search(word, key)
            if m != None:
                result = indexfile.get(key)
                banfilelist.extend(list(result.keys()))
    banfilelist = list(set(banfilelist))
    return banfilelist

# banWord("检索系统 -高速领导")